#!/usr/bin/env jython
"""
Reads in a specific text file and detects named entities
Run by: jython ner.py <filename> <language-code>
"""


# Fix classpath scanning - otherise uimaFIT will not find the UIMA types
from java.lang import Thread
from org.python.core.imp import *
Thread.currentThread().contextClassLoader = getSyspathJavaLoader()

# Dependencies and imports for DKPro modules
from jip.embed import require

# Text Reader
require('de.tudarmstadt.ukp.dkpro.core:de.tudarmstadt.ukp.dkpro.core.io.text-asl:1.6.1')
from de.tudarmstadt.ukp.dkpro.core.io.text import *

# StanfordNlp
require('de.tudarmstadt.ukp.dkpro.core:de.tudarmstadt.ukp.dkpro.core.stanfordnlp-gpl:1.6.1')
from de.tudarmstadt.ukp.dkpro.core.stanfordnlp import *


# OpenNlp
require('de.tudarmstadt.ukp.dkpro.core:de.tudarmstadt.ukp.dkpro.core.opennlp-asl:1.6.1')
from de.tudarmstadt.ukp.dkpro.core.opennlp import *


# Dependencies for selecting specific annotations like sentences/tokens
from de.tudarmstadt.ukp.dkpro.core.api.segmentation.type import * #Token, Sentence, Lemma, Stem 
from de.tudarmstadt.ukp.dkpro.core.api.syntax.type import * #Chunk, PennTree
from de.tudarmstadt.ukp.dkpro.core.api.ner.type import * #NamedEntitiy 

# uimaFIT imports
from org.apache.uima.fit.util.JCasUtil import *
from org.apache.uima.fit.pipeline.SimplePipeline import *
from org.apache.uima.fit.factory.CollectionReaderFactory import *
from org.apache.uima.fit.factory.AnalysisEngineFactory import *


# Access to commandline arguments
import sys

# Check that all necessary arguments have been passed to the program
if len(sys.argv) < 3:
    print globals()['__doc__'] % locals()
    sys.exit(1)



# Assemble and run pipeline
pipeline = iteratePipeline(
  createReaderDescription(TextReader,
    TextReader.PARAM_PATH, sys.argv[1],
    TextReader.PARAM_LANGUAGE, sys.argv[2],
     ),
  createEngineDescription(OpenNlpSegmenter),
  createEngineDescription(StanfordNamedEntityRecognizer));

for jcas in pipeline:
    print "Named Entities:"
    for ne in select(jcas, NamedEntity):
        print ne.coveredText+"\t"+ne.value
